*Fake Fake News Warnings: Misinformation Warnings and the Tainted Truth Effect 

*An online survey experiment conducted through Amazon Mechanical Turk
*by Melanie Freeze and the POSC 226 (Political Psychology) at Carleton College.

*Note: This file produces the cleaned data that is used to run the analysis and build the graphs in R using tte_TablesGraphs.R
*Several of the key analyses are also replicated in this code and materials for the online supplmentary materials created in this file.

****
****Import Data
****
*set working directory here
cd 

import delimited POSC226Study.csv

/*#names of important variables
  #V10 Finished Survey (0=no, 1=yes)
  #$Q9 Did you have any technical problems with the video? 1) YES 2) NO
  #$Q8_3 Video Time submit (seconds--should be greater than 240 if watched 4 minutes)
  #$Q15_3 News Article time - control-vauge/no information
  #$Q16_3 News Article time - detailed with correct information
  #$Q17_3 News Article time - detailed with misinformation on critical questions
*/




*Note this identifies the six experimental conditions
		tab randomdescription random

*Basic identification and participant indicators
	*identification number
	gen id=_n
	*indicator of people who participated first day for $.30  (all others got $.50 )
	*87 on April 26 for $.30
	gen firstday = strpos(v8, "4/25")>0
	tab firstday
		
**
*Identify valid participant subset to be analyzed
**		

	*Need to identify individuals who dropped out, had problems, were not fully exposed to essential experimental materials
		*Time spent watching the video (seconds--should be greater than 240 if watched 4 minutes)
		gen videopagetime=q8_3
		
		*Time spent reading news article (seconds) 
		*random= 1 (Control, Warning), =2 (Control, No Warning) = 3 (Information, Warning), =4 (Information, No Warning), = 5 (Misinformation, Warning), =6(Misinformation, No Warning)
		gen newstime=.
		replace newstime=q15_3 if (random==1 | random==2) & q15_3!=.
		replace newstime=q16_3 if (random==3 | random==4) & q16_3!=.
		replace newstime=q17_3 if (random==5 | random==6) & q17_3!=.

	*tech problems with video q9; 
		gen techprob=0
		replace techprob=1 if q9==1 
		*this person saw it fine, just wanted closed caption
		replace techprob=0 if id==120
	
	*Generate subset filter variable: if =1 participant should be dropped due to insufficient exposure to experimental materials	
	gen drop2=0
		replace drop2=1 if newstime<10
		replace drop2=1 if techprob==1
		replace drop2=1 if videopagetime<240
		*these individuals 88 and 232 dropped out of the survey immediately after entering...all of their data is missing
		*q66==2 is no audio on
		replace drop2=1 if q66==2 |id==88 | id==232
	
	*note the 10 second newstime threshold is rather arbitrary, but it is conservative as the code below reveals the mean amount of time spent in all conditions is greather than 38 seconds. 
			*tabstat newstime, by(randomdescription) statistics(mean p1 p10 p25 count) 

**
*Create DEPENDENT variables here
**

	*create dummies that indicate whether the participant selected the accurate original event memory/fact response
	foreach var in q21 q22 q23 q24 q25 q26 q27 q28 q29 q30 q31 q32 q34 q35 q36 q37 q38 q39 q40 q41{
	gen c`var' = 0
	replace c`var' = 1 if `var'==1
	replace c`var' = . if `var'==.
	}

	*create dummies that indicate whether the participant selected MISLEADING fact response presented in the Misinformation Condition...only relevant for experimental fact subset	
	foreach var in q21 q22 q23 q24 q25 q26 q27 q28 q29 q31 q32{
	gen m`var' = 0
	replace m`var' = 1 if `var'==2
	replace m`var' = . if `var'==.
	}


	/*
			memcrit - Memory score for 11 experimental subset fact memory (note: critical facts are the same as experimental subset facts)
			membuf - Memory score for 9 fixed subset fact memory (note buffer facts are the same as fixed facts)
			memmisinfo - Misinformation score for 11 experimental fact subset

			Bustos/ACA Critical/Ex[: c21-c24; Buffer/Fixed: c34-c36)
			Poe/UN Critical/Exp: c25-c28; Buffer/Fixed: c37-c38
			Malory/Subway Critical/Exp: c29,c31-c32; Buffer/Fixed q30,q39-q41
	*/

	*Memory score for 11 experimental items
		*Experimental Facts Memory Score (in the past referred to as critical facts)--percentage of 11 questions that were accurate
			egen memcrit=rowmean(cq21 cq22 cq23 cq24 cq25 cq26 cq27 cq28 cq29 cq31 cq32 )
			replace memcrit=memcrit*100
				*identify which participants failed to answer at least one of the memory recognition questions 
				egen memcritcount=rownonmiss(cq21 cq22 cq23 cq24 cq25 cq26 cq27 cq28 cq29 cq31 )
				gen incomplete=0 
				replace incomplete=1 if memcritcount<11

		*Memory score for all facts 
			egen memall=rowmean(cq21 cq22 cq23 cq24 cq25 cq26 cq27 cq28 cq29 cq30 cq31 cq32 cq34 cq35 cq36 cq37 cq38 cq39 cq40 cq41)
			replace memall=memall*100
				*identify which participants failed to answer at least one of the memory recognition questions 
				egen memallcount=rownonmiss(cq21 cq22 cq23 cq24 cq25 cq26 cq27 cq28 cq29 cq30 cq31 cq32 cq34 cq35 cq36 cq37 cq38 cq39 cq40 cq41)
				gen incomplete_all=0 
				replace incomplete_all=1 if memallcount<19
			
			*Fixed Facts Memory Score(in the past referred to as control facts--buffer facts); set across all conditions
			egen membuf=rowmean(cq30 cq34 cq35 cq36 cq37 cq38 cq39 cq40 cq41)
			replace membuf=membuf*100
			egen membufcount=rownonmiss(cq30 cq34 cq35 cq36 cq37 cq38 cq39 cq40 cq41)
			
	*Misinformation Score for Experimental facts
			egen memmisinfo=rowmean(mq21  mq22  mq23 mq24 mq25 mq26 mq27 mq28 mq29 mq31 mq32)
			replace memmisinfo = memmisinfo*100
		

	*Crediblity of Video
			*In your opinion, how well do the words listed below describe the news article you read? (believable, accurate, trustworthy, biased, reliable, authoritative, honest, valuable, informative, professional, interesting)
			* (1 not at all, 2 slightly well, 3 fairly well, 4 considerably well, 5 extremely)
			*reverse code biased variable 
				recode q67_4 (1=5) (2=4) (3=3) (4=2) (5=1), gen(q67_4r) 
				recode q87_4 (1=5) (2=4) (3=3) (4=2) (5=1), gen(q87_4r) 
			egen credvid_clean = rowmean(q67_1  q67_2 q67_3 q67_4r   q67_5 q67_6 q67_7 q67_8 q67_9 q67_10 q67_11)
			egen credvid_count = rownonmiss(q67_1  q67_2 q67_3 q67_4r   q67_5 q67_6 q67_7 q67_8 q67_9 q67_10 q67_11)

		
	*Credibliity of News Article
			egen crednews_clean = rowmean(q87_1  q87_2 q87_3 q87_4r  q87_5 q87_6 q87_7 q87_8 q87_9 q87_10 q87_11)
			egen crednews_count = rownonmiss(q87_1  q87_2 q87_3 q87_4r  q87_5 q87_6 q87_7 q87_8 q87_9 q87_10 q87_11)

	*Memory uncertainty (DK score)		
			*experimental memory questions, very few missing
			 foreach var in q21 q22 q23 q24 q25 q26 q27 q28 q29 q31 q32{
			 tab `var' if drop2==0, missing
			 }
			 
				 *Uncertainty score - experimental facts 
				 foreach var in q21 q22 q23 q24 q25 q26 q27 q28 q29 q31 q32{
				 gen dk`var' = 0
				 replace dk`var'=1 if `var'==4
				 }
				 
				  *Uncertainty score version 2 - experimental facts (also includes item non-response)
				 foreach var in q21 q22 q23 q24 q25 q26 q27 q28 q29 q31 q32{
				 gen dkna`var' = 0
				 replace dkna`var'=1 if `var'==4
				 replace dkna`var'=1 if `var'==.
				 }
			 
			 
			 *fixed subset
			  foreach var in q30 q34 q35 q36 q37 q38 q39 q40 q41{
			 tab `var' if drop2==0, missing
			 }
				 *Uncertainty score - fixed facts 
				  foreach var in q30 q34 q35 q36 q37 q38 q39 q40 q41{
				 gen dk`var' = 0
				 replace dk`var'=1 if `var'==4
				 }
				 
				  *Uncertainty score  version 2 - fixed facts  (also includes item non-response)
				  foreach var in q30 q34 q35 q36 q37 q38 q39 q40 q41{
				 gen dkna`var' = 0
				 replace dkna`var'=1 if `var'==4
				 replace dkna`var'=1 if `var'==.
				 }
		*Final Undertainty Scores
		 egen dkcrit=rowmean(dkq21 dkq22 dkq23 dkq24 dkq25 dkq26 dkq27 dkq28 dkq29 dkq31 dkq32)
		 replace dkcrit=dkcrit*100
		 
		 egen dkbuf=rowmean(dkq30 dkq34 dkq35 dkq36 dkq37 dkq38 dkq39 dkq40 dkq41)
		 replace dkbuf=dkbuf*100
		 
		 *Version with "none" response and item non-response
		 egen dknacrit=rowmean(dknaq21 dknaq22 dknaq23 dknaq24 dknaq25 dknaq26 dknaq27 dknaq28 dknaq29 dknaq31 dknaq32)
		 replace dknacrit=dknacrit*100
		 
		 egen dknabuf=rowmean(dknaq30 dknaq34 dknaq35 dknaq36 dknaq37 dknaq38 dknaq39 dknaq40 dknaq41)
		 replace dknabuf=dknabuf*100

		 
	label variable memall "All Facts"
	label variable memcrit "Experimental Facts Subset"
	label variable membuf "Fixed Facts Subset"		 
		 
**
*Create INDEPENDENT variables here
**	


		*Main effects
			*(mis)information conditions
			recode random (1/2=2) (3/4=3) (5/6=1), gen(misinfomain)
			label values misinfomain misinfomainlbl
			label define misinfomainlbl 1 "Misinformation" 2 "Control" 3 "Information"

			*warning conditions
			recode random (2 4 6=0) (1 3 5=1), gen(warnmain)
			label values warnmain warnmainlbl
			label define warnmainlbl 0 "No Warning" 1 "Warning"
			
		*orderd by warning then information conditions
		recode random (1=5) (2=2) (3=6) (4=3) (5=4) (6=1), gen(condition)
		label values condition conditionlbl
		label define conditionlbl  1 "No/Misinfo" 2 "No/Noinfo" 3 "No/Info" 4 "Yes/Misinfo" 5 "Yes/Noinfo" 6 "Yes/Info"
		label variable condition "Conditions"

		decode condition, gen(conditionf)


		*ordered by information than warning condtions
		recode random (1=4 ) (2=3 ) (3=6 ) (4=5 ) (5=2 ) (6=1 ), gen(cond)
		label values cond condlbl
		label define condlbl 1 "No Warning, Misinformation" 2 "Warning, Misinformation"  3  "No Warning, Control"  4 "Warning, Control" 5 "No Warning, Information" 6 "Warning, Information"
		label variable cond "Warning and Informational Content Experimental Conditions"	
		tab cond condition 

		*These variables are used for the Regression Models- interaction specficiations
		gen misinformed=0
		replace misinformed=1 if misinfomain==1
		gen notinformed=0
		replace notinformed=1 if misinfomain==2
		gen informed=0
		replace informed=1 if misinfomain==3
		 
		 
		gen warned_misinformed= warnmain*misinformed
		gen warned_notinformed= warnmain*notinformed
		gen warned_informed= warnmain*informed


		label variable warnmain "Warning"
		label variable notinformed "Control"
		label variable misinformed "Misinformation"
		label variable informed "Information"


**
*Control Variables
**

	gen pid7=.
	replace pid7 = 7 if q45==1 & q47==1
	replace pid7 = 6 if q45==1 & q47==2
	replace pid7 = 5 if q45==3 & q49==1
	replace pid7 = 5 if q45==4 & q49==1
	replace pid7 = 4 if q45==3 & q49==3
	replace pid7 = 4 if q45==4 & q49==3
	replace pid7 = 3 if q45==3 & q49==2
	replace pid7 = 3 if q45==4 & q49==2
	replace pid7 = 2 if q45==2 & q47==2
	replace pid7 = 1 if q45==2 & q47==1
	label values pid7 pid7lbl
	label define pid7lbl 1 "StgDem" 2 "WkDem" 3 "IndDem" 4 "IndPure" 5 "IndRep" 6 "WkRep" 7 "StgRep"
	
	
	gen selfid=q51
	label values selfid selfidlbl
	label define selfidlbl 1 "ExtLib" 2 "Lib" 3 "StyLib" 4 "Mod" 5 "StyCon" 6 "Con" 7 "ExtCon"
	
	gen newsdays= q75-1
	
		gen female = 0
		replace female =1 if q59==2
		
		*Some people don’t pay much attention to political campaigns. How about you? Would you say that yo...
		recode q71 (1=3) (3=1), gen(attention)
		
		gen age = q61
			label values age agelbl
			label define agelbl 1 "18-25" 2 "26-34" 3 "35-54" 4 "55-64" 5 "65 or over"
		
		gen married =0
		replace married =1 if q63==4
		
		gen educ=q53
		label values educ educlbl
		label define educlbl 1 "Grammar School" 2 "High School or equivalent" 3 "Vocational/Technical School" 4 "Some College" 5 "College Graduate (4 year)" 6 "Master's Degree (MS)" 7 "Doctoral Degree (PhD)" 8 "Professional Degree (MD, JD, etc.)" 9 "Other" 
		
		
		*education 1 = grammar  school, 2 = hs, 3=voc/tech, 4= some college, 5= college grad, 6= MS, 7 = phd, 8= professional, 9 = other
		gen collegeplus = 0 
		replace collegeplus =1 if q53>4 & q53!=9
	
		
		gen collegegrad=0
		replace collegegrad=1 if q53>4 &  q53<9
		
		gen inc=q55
			label values inc inclbl
			label define inclbl 1 "Rather not say"  2" Under $10,000" 3 "$10,000 - $19,999" 4 "$20,000 - $29,999" 5 "$30,000 - $39,999" 6 "$40,000 - $49,999" 7 "$50,000 - $74,999" 8 "$75,000 - $99,999" 9	"$100,000 - $150,000" 10 "Over $150,000"
		
		gen over50k=0
		replace over50k=1 if inc<11 & inc>6
		replace over50k=. if inc==1
		gen unemp=0
		replace unemp=1 if q57==4
		*strangely high percent report being 88% registered to vote
		gen reg=0
		replace reg=1 if q65==3
		recode q67 (1=1) (2=0), gen(citizen)
		recode q69 (1=1) (2=0), gen(mobile)
		gen race=q73
		label values race racelbl
		label define racelbl 1 "white, nh" 2 "black" 3 "hispanic"  4 "native am" 5 "asian" 6 "other"
		gen white=0
		replace white=1 if q73==1
		
		
		* low need for cognition
		gen lowcog=0
		replace lowcog=1 if q76==2
		replace lowcog=1 if q77==1
		
		gen know=0
		*q78 is bad question because not clear if washington state or dc
		replace know = know+1 if q79==1
		replace know = know+1 if q82==1
		replace know = know+1 if q81==2
		replace know = know+1 if q83==3
		replace know = know+1 if q84==1
		
		gen partisan=abs(pid7-4)
		gen ideological = abs(selfid-4)
		gen mod =0
		replace mod=1 if selfid==4
		gen lib=0
		replace lib =1 if selfid>0 & selfid<4
		gen consv=0
		replace consv =1 if selfid>4 & selfid<8


		
	***ANALYSES

*table 1 and SI-8 (Correct and Incorrect Memory)
	*Memory Score: Experimental  
	regress memcrit warnmain misinformed informed  warned_misinformed warned_informed if drop2==0 
		*Marginal effects of warnings
		lincom warnmain+ warned_misinformed
		lincom warnmain
		lincom warnmain+ warned_informed
		
		*comparison of marginal effect of warning between the misinformation and information conditions
		lincom warned_misinformed - warned_informed
		
		
	
	
	*Memory Score: Fixed  
	regress membuf warnmain misinformed informed  warned_misinformed warned_informed if drop2==0
		*Marginal effects of warnings
		lincom warnmain+ warned_misinformed
		lincom warnmain
		lincom warnmain+ warned_informed
	
	*Misinformation Score: Experimental
	regress memmisinfo warnmain misinformed informed  warned_misinformed warned_informed if drop2==0
		*Marginal effects of warnings
		lincom warnmain+ warned_misinformed
		lincom warnmain
		lincom warnmain+ warned_informed
 
*table 2 and SI-9 (Memory Uncertainty) (note no difference when looking at dkna)
	*Uncertainty Score: Experimental  
	regress dkcrit warnmain misinformed informed  warned_misinformed warned_informed if drop2==0 
		*Marginal effects of warnings
		lincom warnmain+ warned_misinformed
		lincom warnmain
		lincom warnmain+ warned_informed
		
	
	*Uncertainty Score: Experimental  
	regress dkbuf warnmain misinformed informed  warned_misinformed warned_informed if drop2==0
		*Marginal effects of warnings
		lincom warnmain+ warned_misinformed
		lincom warnmain
		lincom warnmain+ warned_informed

*table 3 and SI-10 (Crediblity) 
	*Video Credibility
	regress credvid_clean warnmain misinformed informed  warned_misinformed warned_informed if drop2==0 
		*Marginal effects of warnings
		lincom warnmain+ warned_misinformed
		lincom warnmain
		lincom warnmain+ warned_informed
		
	
	*News Article Credibility   
	regress crednews_clean warnmain misinformed informed  warned_misinformed warned_informed if drop2==0
		*Marginal effects of warnings
		lincom warnmain+ warned_misinformed
		lincom warnmain
		lincom warnmain+ warned_informed

		
*Appendix Tables
*SI-1: Summary Statistics of Analyzed and Excluded Samples
		*demographic summary statistics for full sample
		tabstat female lowcog white age educ inc attention newsdays know pid7 selfid if drop2==0,  statistics(mean sd n)
		*demographic summary statistics for excluded sample
		tabstat female lowcog white age educ inc attention newsdays know pid7 selfid if drop2==1,  statistics(mean sd n)
		

* Table SI-2: Model of Excluded Participants
*ssc install estout, replace
	logit drop2 female lowcog newsdays know age collegegrad married  attention  white pid7 selfid unemp over50k
	estimates store mod1
	estout mod1, label cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) starlevels(+ 0.10 * 0.05 ** 0.01) stats(N aic ll, fmt(%9.0f %9.3f) labels("N" "Adj.R-Square")) varlabels(_cons Constant) legend  mlabels("Dropped")

	estout mod1, label cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) starlevels(+ 0.10 * 0.05 ** 0.01) stats(N aic ll, fmt(%9.0f %9.3f) labels("N" "Adj.R-Square")) varlabels(_cons Constant) legend style(tex) mlabels("Dropped")

*Table SI-3: Comparision of Survey Sample with Pop Benchmarks
			tabstat female white age, by(drop2) statistics(count mean sd min max) 
			
			*educ
			tab q53
			tab q53 drop2, col chi
			
			tab female drop2, col chi
			tab pid7 drop2, col chi missing
			*q45 is pid1m 1=rep, 2=dem, 3=indp, 4=0ther
			tab q45 drop2, col chi missing
			tabstat female lowcog attention newsdays white know partisan pid7 selfid, by(drop2) statistics(mean min max count)



export  delimited "tte_cleanedfopreplication.csv", nolabel replace
